'''
Author: bodhi
Date: 2025-05-17 10:06:55
LastEditors: bodhi
LastEditTime: 2025-05-17 10:10:09
'''
import requests
import json

# API端点URL
url = 'http://www.orsalab.cn/v1/datasets/7ca56c9c-1864-4ee1-bce4-da9de88d2786/document/create_by_file'

# 设置请求头
headers = {
    'Authorization': 'Bearer dataset-SjC8WTpFOK3qj7ygFgZyBBAn'
}

# 设置表单数据
data_json = {
    "indexing_technique": "economy",
    "process_rule": {
        "rules": {
            "pre_processing_rules": [
                {"id": "remove_extra_spaces", "enabled": True},
                {"id": "remove_urls_emails", "enabled": True}
            ],
            "segmentation": {
                "separator": "\n\n",
                "max_tokens": 500
            }
        },
        "mode": "custom"
    }
}

# 表单数据
form_data = {
    'data': (None, json.dumps(data_json), 'text/plain')
}

# 文件路径
file_path = r'C:\Users\dlfan\Zotero\storage\BRTWXWB9\Gong.pdf'  # 请根据实际文件路径修改

# 文件数据
files = {
    'file': ('Gong.pdf', open(file_path, 'rb'), 'application/pdf')
}

# 发送POST请求
try:
    response = requests.post(url, headers=headers, files={**form_data, **files})
    
    # 打印响应
    print(f"状态码: {response.status_code}")
    print(f"响应内容: {response.text}")
except Exception as e:
    print(f"请求出错: {e}")
finally:
    # 确保文件已关闭
    files['file'][1].close()
